library(dplyr)

Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union
library(readr)
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────────────────────────────────────────────── tidyverse 2.0.0 ──
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.4.3     ✔ tibble    3.2.1
✔ lubridate 1.9.2     ✔ tidyr     1.3.0
✔ purrr     1.0.2     ── Conflicts ────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(ggplot2)
library(highcharter)
Registered S3 method overwritten by 'htmlwidgets':
  method           from         
  print.htmlwidget tools:rstudio
Registered S3 method overwritten by 'quantmod':
  method            from
  as.zoo.data.frame zoo 
Registered S3 method overwritten by 'data.table':
  method           from
  print.data.table     
Highcharts (www.highcharts.com) is a Highsoft software product which is
not free for commercial and Governmental use
library(magrittr)

Attaching package: ‘magrittr’

The following object is masked from ‘package:purrr’:

    set_names

The following object is masked from ‘package:tidyr’:

    extract

EDA

Data loading

data <- read_csv("https://raw.githubusercontent.com/Alexburk93/Data_Wrangling_EDA/main/data/suicide_analysis.csv")
New names:Rows: 894 Columns: 22── Column specification ────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (13): Country name, Code, Indicator Name, Indicator Code, VAR, Variable, MEA, Measure, ISIC4...17, ISIC4...18, U...
dbl  (9): Year, Age-standardized suicide rate - Sex: both sexes, Life Ladder, Social support, Healthy life expectanc...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
                data %>% sample_n(19)

Renaming variables and drop unwanted columns

# renaming the columns 
data <- data %>%
  rename(`Country_name` = `Country name`,
         `Suicide_Rate` = `Age-standardized suicide rate - Sex: both sexes`,
         `Life_ladder` = `Life Ladder`,
         `Social_support` = `Social support`,
         `Life_expectancy` = `Healthy life expectancy at birth`,
         `Freedom_choices` = `Freedom to make life choices`,
         `Corruption` = `Perceptions of corruption`,
         `Indicator_name` = `Indicator Name`,
         `Indicator_code` = `Indicator Code`)

# drop columns 
# remove the original 'Time' column
data <- select(data, -Variable, -ISIC4...17, -ISIC4...18, -VAR, -MEA, -`Unit Code`)  
data

Data exploration

head(data)
# change names
names(data) <- make.names(names(data))
# dimensions of the dataframe
nrow(data)
[1] 894
ncol(data)
[1] 16
dim(data)
[1] 894  16
# check the structure of the object
str(data)
tibble [894 × 16] (S3: tbl_df/tbl/data.frame)
 $ Country_name   : chr [1:894] "Australia" "Australia" "Australia" "Australia" ...
 $ Code           : chr [1:894] "AUS" "AUS" "AUS" "AUS" ...
 $ Year           : num [1:894] 2011 2011 2011 2011 2011 ...
 $ Suicide_Rate   : num [1:894] 10.1 10.1 10.1 10.1 11 ...
 $ Life_ladder    : num [1:894] 7.41 7.41 7.41 7.41 7.19 ...
 $ Social_support : num [1:894] 0.967 0.967 0.967 0.967 0.954 ...
 $ Life_expectancy: num [1:894] 72.3 72.3 72.3 72.3 72.1 ...
 $ Freedom_choices: num [1:894] 0.945 0.945 0.945 0.945 0.935 ...
 $ Corruption     : num [1:894] 0.382 0.382 0.382 0.382 0.269 ...
 $ Indicator_name : chr [1:894] "GDP (current US$)" "GDP (current US$)" "GDP (current US$)" "GDP (current US$)" ...
 $ Indicator_code : chr [1:894] "NY.GDP.MKTP.CD" "NY.GDP.MKTP.CD" "NY.GDP.MKTP.CD" "NY.GDP.MKTP.CD" ...
 $ GDP            : num [1:894] 1.40e+12 1.40e+12 1.40e+12 1.40e+12 1.68e+11 ...
 $ Measure        : chr [1:894] "Index 2007=100" "Index 2007=100" "Index 2007=100" "Index 2007=100" ...
 $ Unit           : chr [1:894] "Index" "Index" "Index" "Index" ...
 $ Value          : num [1:894] 132 138 143 145 122 ...
 $ Quarter        : chr [1:894] "Q1" "Q2" "Q3" "Q4" ...
# look at columns 6, 7 and 10
head(data[ , c(2, 4:6, 12, 15)])
# look at columns 6, 7 and 10
tail(data[ , c(1, 3, 9)])
table(data$Year)

2011 2012 2013 2014 2015 2016 2017 2018 2019 
 100  100  100  100  100  100  100   97   97 
data %>% 
  select(Country_name) %>% 
  unique() %>% 
  nrow()
[1] 19
unique(data$Country_name)
 [1] "Australia"      "New Zealand"    "United States"  "Spain"          "Netherlands"    "France"        
 [7] "Finland"        "Belgium"        "Japan"          "South Africa"   "Iceland"        "Norway"        
[13] "Sweden"         "Italy"          "Brazil"         "United Kingdom" "Germany"        "Canada"        
[19] "Denmark"       
unique(data$Year)
[1] 2011 2012 2013 2014 2015 2016 2017 2018 2019

Interactive maps

# Set highcharter options for tooltip decimals
options(highcharter.tooltip.valueDecimals = 2)

# Create highcharter map visualization
hc <- highchart() %>%
  hc_add_series_map(
    worldgeojson, data, value = "GDP", 
    joinBy = c('name', 'Country_name'),
    name = "GDP (current US$)"
  )  %>% 
  hc_colorAxis(stops = color_stops()) %>% 
  hc_title(text = "World Map") %>% 
  hc_subtitle(text = "GDP in current US$")

hc
# Set highcharter options for tooltip decimals
options(highcharter.tooltip.valueDecimals = 2)

# Create map visualizations for each variable
hc_life_expectancy <- highchart() %>%
  hc_add_series_map(
    worldgeojson, data, 
    value = "Life_expectancy", 
    joinBy = c('name', 'Country_name'),
    name = "Life Expectancy"
  ) %>%
  hc_colorAxis(stops = color_stops()) %>%
  hc_title(text = "World Map") %>%
  hc_subtitle(text = "Life Expectancy")

hc_suicide_rates <- highchart() %>%
  hc_add_series_map(
    worldgeojson, data, 
    value = "Suicide_Rate", 
    joinBy = c('name', 'Country_name'),
    name = "Suicide Rates"
  ) %>%
  hc_colorAxis(stops = color_stops()) %>%
  hc_title(text = "World Map") %>%
  hc_subtitle(text = "Suicide Rate")

hc_corruption <- highchart() %>%
  hc_add_series_map(
    worldgeojson, data, 
    value = "Corruption", 
    joinBy = c('name', 'Country_name'),
    name = "Corruption"
  ) %>%
  hc_colorAxis(stops = color_stops()) %>%
  hc_title(text = "World Map") %>%
  hc_subtitle(text = "Corruption")

# Display the map visualizations
list(hc_life_expectancy, hc_suicide_rates, hc_corruption)
[[1]]

[[2]]

[[3]]
NA

AVG GDP over years

Calculation

avg_gdp_per_year <- data %>% 
  group_by (`Year`) %>% 
  summarise(avg_gpd = mean(`GDP`))

avg_gdp_per_year

Plot

ggplot(avg_gdp_per_year, aes(x = Year, y = avg_gpd)) +
  geom_line(color = "blue") +  
  labs(title = "Average GDP Over Time worldwide",
       x = "Year",
       y = "GDP") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_x_continuous(breaks = seq(min(avg_gdp_per_year$Year), max(avg_gdp_per_year$Year), by = 1))

AVG Happiness over years

Calculation

avg_happiness_per_year <- data %>% 
  group_by (`Year`) %>% 
  summarise(avg_happinnes = mean(`Life_ladder`, na.rm = T))

avg_happiness_per_year

Plot

ggplot(avg_happiness_per_year, aes(x = Year, y = avg_happinnes)) +
  geom_line(color = "blue") +  
  labs(title = "Average Happiness Over Time worldwide",
       x = "Year",
       y = "Happiness") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_x_continuous(breaks = seq(min(avg_happiness_per_year$Year), max(avg_happiness_per_year$Year), by = 1))

AVG Suicide Rates over years

Calculation

avg_Suicide_Rate_per_year <- data %>% 
  group_by (`Year`) %>% 
  summarise(avg_Suicide_Rate = mean(`Suicide_Rate`, na.rm = T))

avg_Suicide_Rate_per_year

Plot

ggplot(avg_Suicide_Rate_per_year, aes(x = Year, y = avg_Suicide_Rate)) +
  geom_line(color = "blue") +  
  labs(title = "Average Suicide Rate Over Time worldwide",
       x = "Year",
       y = "Suicide Rate") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_x_continuous(breaks = seq(min(avg_Suicide_Rate_per_year$Year), max(avg_Suicide_Rate_per_year$Year), by = 1))

AVG Bankruptcies over years

Calculation

avg_Bankruptcies_per_year <- data %>% 
  group_by (`Year`) %>% 
  summarise(avg_Bankruptcies = mean(`Value`, na.rm = T))

avg_Bankruptcies_per_year

Plot

ggplot(avg_Bankruptcies_per_year, aes(x = Year, y = avg_Bankruptcies)) +
  geom_line(color = "blue") +  
  labs(title = "Average Bankruptcies Over Time worldwide",
       x = "Year",
       y = "Bankruptcies") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_x_continuous(breaks = seq(min(avg_Bankruptcies_per_year$Year), max(avg_Bankruptcies_per_year$Year), by = 1))

Plot Average GDP and Average Suicide Rate over years

# Finding the ratio for scaling the second axis
ratio <- max(avg_gdp_per_year$avg_gpd) / max(avg_Suicide_Rate_per_year$avg_Suicide_Rate)

# Creating the base plot
ggplot() +
  # Adding the bar plot for GDP
  geom_bar(data = avg_gdp_per_year, aes(x = Year, y = avg_gpd), stat = "identity", fill = "skyblue", width = 0.2) +
  # Adding the line plot for Average Happiness
  geom_line(data = avg_Suicide_Rate_per_year, aes(x = Year, y = avg_Suicide_Rate * ratio), color = "red", size = 1.5) +
  # Enhancing the plot
  labs(title = "Average GDP and Suicide Rate Over Time",
       x = "Year",
       y = "Average GDP") +
  scale_y_continuous(sec.axis = sec_axis(~ . / ratio, name = "Average Suicide Rate")) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_x_continuous(breaks = seq(min(c(avg_gdp_per_year$Year, avg_Suicide_Rate_per_year$Year)), 
                                  max(c(avg_gdp_per_year$Year, avg_Suicide_Rate_per_year$Year)), by = 1))
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
Please use `linewidth` instead.

Plot Average Happinness and Average Suicide Rate over years

# Finding the ratio for scaling the second axis
ratio <- max(avg_happiness_per_year$avg_happinnes) / max(avg_Suicide_Rate_per_year$avg_Suicide_Rate)

# Creating the base plot
ggplot() +
  # Adding the bar plot for GDP
  geom_bar(data = avg_happiness_per_year, aes(x = Year, y = avg_happinnes), stat = "identity", fill = "skyblue", width = 0.2) +
  # Adding the line plot for Average Happiness
  geom_line(data = avg_Suicide_Rate_per_year, aes(x = Year, y = avg_Suicide_Rate * ratio), color = "red", size = 1.5) +
  # Enhancing the plot
  labs(title = "Average Happinness and Suicide Rate Over Time",
       x = "Year",
       y = "Average Happinness") +
  scale_y_continuous(sec.axis = sec_axis(~ . / ratio, name = "Average Suicide Rate")) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_x_continuous(breaks = seq(min(c(avg_happiness_per_year$Year, avg_Suicide_Rate_per_year$Year)), 
                                  max(c(avg_happiness_per_year$Year, avg_Suicide_Rate_per_year$Year)), by = 1))

Plot Average Bankruptcies and Average Suicide Rate over years

# Finding the ratio for scaling the second axis
ratio <- max(avg_Bankruptcies_per_year$avg_Bankruptcies) / max(avg_Suicide_Rate_per_year$avg_Suicide_Rate)

# Creating the base plot
ggplot() +
  # Adding the bar plot for GDP
  geom_bar(data = avg_Bankruptcies_per_year, aes(x = Year, y = avg_Bankruptcies), stat = "identity", fill = "skyblue", width = 0.2) +
  # Adding the line plot for Average Happiness
  geom_line(data = avg_Suicide_Rate_per_year, aes(x = Year, y = avg_Suicide_Rate * ratio), color = "red", size = 1.5) +
  # Enhancing the plot
  labs(title = "Average Bankruptcies and Suicide Rate Over Time",
       x = "Year",
       y = "Average Bankruptcies") +
  scale_y_continuous(sec.axis = sec_axis(~ . / ratio, name = "Average Suicide Rate")) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_x_continuous(breaks = seq(min(c(avg_Bankruptcies_per_year$Year, avg_Suicide_Rate_per_year$Year)), 
                                  max(c(avg_Bankruptcies_per_year$Year, avg_Suicide_Rate_per_year$Year)), by = 1))

Comparisions

Two Happiest and Two unhappiest countries vs average suicide rate

avg_Suicide_Rate_per_country = data %>%
  group_by(Country_name) %>%
  summarise(avg_suicide_rate = mean(Suicide_Rate, na.rm = TRUE)) %>%
  arrange(avg_suicide_rate) %>% 
  mutate(Row_Number = row_number())

avg_Suicide_Rate_per_country

avg_happiness_per_country <- data %>%
  group_by(Country_name) %>%
  summarise(avg_happiness = mean(Life_ladder, na.rm = TRUE)) %>%
  arrange(desc(avg_happiness))

least_happy =  tail(avg_happiness_per_country, 2)
most_happy = head(avg_happiness_per_country, 2)


avg_Suicide_Rate_per_country %>% 
  filter(Country_name %in% least_happy$Country_name)

# Interpretation: Japan and South Africa are two very unhappy countries. And they also have a high suicide rate

avg_Suicide_Rate_per_country %>% 
  filter(Country_name %in% most_happy$Country_name)

# Interpretation: Finland is a the second most happy country. But is still on place 16/19 when it comes to suicides

Two most wealthy countries and two most poor countries vs average suicide rate

avg_gdp_per_country <- data %>% 
  group_by (`Country_name`) %>% 
  summarise(avg_gpd = mean(`GDP`)) %>% 
  arrange(desc(avg_gpd))

least_gdp =  tail(avg_gdp_per_country, 2)
most_gdp = head(avg_gdp_per_country, 2)

avg_Suicide_Rate_per_country %>% 
  filter(Country_name %in% least_money$Country_name)
Error in `filter()`:
ℹ In argument: `Country_name %in% least_money$Country_name`.
Caused by error:
! object 'least_money' not found
Backtrace:
 1. avg_Suicide_Rate_per_country %>% ...
 9. Country_name %in% least_money$Country_name

Two most bankcuptcies and two least bankcuptcies countries vs average suicide rate


avg_Bankruptcies_per_year <- data %>% 
  group_by (`Country_name`) %>% 
  summarise(avg_bankruptcies = mean(`Value`, na.rm = T)) %>% 
  arrange(desc(avg_bankruptcies))

least_bank =  tail(avg_Bankruptcies_per_year, 2)
most_bank = head(avg_Bankruptcies_per_year, 2)

avg_Suicide_Rate_per_country %>% 
  filter(Country_name %in% least_bank$Country_name)

# Interpretation: Bankruptcies don't have an influence on suicide rates

avg_Suicide_Rate_per_country %>% 
  filter(Country_name %in% most_bank$Country_name)

# Interpretation: Bankruptcies don't have an influence on suicide rates

In depth analysis Germany

Data preperation for Germany

# Prepare data for only Germany
germany_data = data %>% 
  filter(Country_name == "Germany")

Plot GDP Germany

# Plot Germany GDP over Years
avg_gdp_year_germany = germany_data %>% 
  group_by(Year) %>% 
  summarise(avg_gdp = mean(GDP)) 


ggplot(avg_gdp_year_germany, aes(x = Year, y = avg_gdp)) +
  geom_line(color = "blue") +  
  labs(title = "Average GPD Over Time - Germany",
       x = "Year",
       y = "GDP") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_x_continuous(breaks = seq(min(avg_gdp_year_germany$Year), max(avg_gdp_year_germany$Year), by = 1))

Plot Suicide Rate Germany

avg_suicide_year_germany = germany_data %>% 
  group_by(Year) %>% 
  summarise(avg_suicide = mean(Suicide_Rate)) 


ggplot(avg_suicide_year_germany, aes(x = Year, y = avg_suicide)) +
  geom_line(color = "blue") +  
  labs(title = "Average Suicide Rate Over Time - Germany",
       x = "Year",
       y = "Suicide Rate") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_x_continuous(breaks = seq(min(avg_suicide_year_germany$Year), max(avg_suicide_year_germany$Year), by = 1))

Plot Bankruptcies Rate Germany

avg_bank_year_germany = germany_data %>% 
  group_by(Year) %>% 
  summarise(avg_bank = mean(Value)) 


ggplot(avg_bank_year_germany, aes(x = Year, y = avg_bank)) +
  geom_line(color = "blue") +  
  labs(title = "Average bankruptcies Over Time - Germany",
       x = "Year",
       y = "Bankruptcies") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_x_continuous(breaks = seq(min(avg_bank_year_germany$Year), max(avg_bank_year_germany$Year), by = 1))

Plot Happiness Rate Germany

avg_happiness_year_germany = germany_data %>% 
  group_by(Year) %>% 
  summarise(avg_happy = mean(Life_ladder)) 


ggplot(avg_happiness_year_germany, aes(x = Year, y = avg_happy)) +
  geom_line(color = "blue") +  
  labs(title = "Average Happiness Over Time - Germany",
       x = "Year",
       y = "Happiness") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_x_continuous(breaks = seq(min(avg_happiness_year_germany$Year), max(avg_happiness_year_germany$Year), by = 1))

In depth analysis South Africa

Data preperation for South Africa

# Prepare data for only Germany
SA_data = data %>% 
  filter(Country_name == "South Africa")

Plot GDP SA

# Plot Germany GDP over Years
avg_gdp_year_SA = SA_data %>% 
  group_by(Year) %>% 
  summarise(avg_gdp = mean(GDP)) 



ggplot(avg_gdp_year_SA, aes(x = Year, y = avg_gdp)) +
  geom_line(color = "blue") +  
  labs(title = "Average GPD Over Time - SA",
       x = "Year",
       y = "GDP") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_x_continuous(breaks = seq(min(avg_gdp_year_SA$Year), max(avg_gdp_year_SA$Year), by = 1))

Plot Suicide Rate SA

avg_suicide_year_SA = SA_data %>% 
  group_by(Year) %>% 
  summarise(avg_suicide = mean(Suicide_Rate)) 


ggplot(avg_suicide_year_SA, aes(x = Year, y = avg_suicide)) +
  geom_line(color = "blue") +  
  labs(title = "Average Suicide Rate Over Time - SA",
       x = "Year",
       y = "Suicide Rate") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_x_continuous(breaks = seq(min(avg_suicide_year_SA$Year), max(avg_suicide_year_SA$Year), by = 1))

Plot Happiness Rate SA

avg_happiness_year_SA = SA_data %>% 
  group_by(Year) %>% 
  summarise(avg_happy = mean(Life_ladder)) 


ggplot(avg_happiness_year_SA, aes(x = Year, y = avg_happy)) +
  geom_line(color = "blue") +  
  labs(title = "Average Happiness Over Time - SA",
       x = "Year",
       y = "Happiness") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_x_continuous(breaks = seq(min(avg_happiness_year_SA$Year), max(avg_happiness_year_SA$Year), by = 1))

Plot Bankruptcies Rate SA

avg_bank_year_SA = SA_data %>% 
  group_by(Year) %>% 
  summarise(avg_bank = mean(Value)) 


ggplot(avg_bank_year_SA, aes(x = Year, y = avg_bank)) +
  geom_line(color = "blue") +  
  labs(title = "Average bankruptcies Over Time - SA",
       x = "Year",
       y = "Bankruptcies") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_x_continuous(breaks = seq(min(avg_bank_year_SA$Year), max(avg_bank_year_SA$Year), by = 1))

---
title: "Presentation - Don't commit Suicide"
author: "Marckenrold Cadet & Alexander Burkhart"
date: "`r Sys.Date()`"
output: 
   html_notebook:
       toc: true 
       toc_float: true
       toc_depth: 2
       theme: united
       highlight: tango
---

```{r}
library(dplyr)
library(readr)
library(tidyverse)
library(ggplot2)
library(highcharter)
library(magrittr)
```
# EDA
## Data loading
```{r}
data <- read_csv("https://raw.githubusercontent.com/Alexburk93/Data_Wrangling_EDA/main/data/suicide_analysis.csv")
                data %>% sample_n(19)
```
## Renaming variables and drop unwanted columns
```{r}
# renaming the columns 
data <- data %>%
  rename(`Country_name` = `Country name`,
         `Suicide_Rate` = `Age-standardized suicide rate - Sex: both sexes`,
         `Life_ladder` = `Life Ladder`,
         `Social_support` = `Social support`,
         `Life_expectancy` = `Healthy life expectancy at birth`,
         `Freedom_choices` = `Freedom to make life choices`,
         `Corruption` = `Perceptions of corruption`,
         `Indicator_name` = `Indicator Name`,
         `Indicator_code` = `Indicator Code`)

# drop columns 
# remove the original 'Time' column
data <- select(data, -Variable, -ISIC4...17, -ISIC4...18, -VAR, -MEA, -`Unit Code`)  
data
```
## Data exploration
```{r}
head(data)
```

```{r}
# change names
names(data) <- make.names(names(data))
```

```{r}
# dimensions of the dataframe
nrow(data)
ncol(data)
dim(data)
```

```{r}
# check the structure of the object
str(data)
```
```{r}
# look at columns 6, 7 and 10
head(data[ , c(2, 4:6, 12, 15)])
```

```{r}
# look at columns 6, 7 and 10
tail(data[ , c(1, 3, 9)])
```

```{r}
table(data$Year)
```


```{r}
data %>% 
  select(Country_name) %>% 
  unique() %>% 
  nrow()
```

```{r}
unique(data$Country_name)
```

```{r}
unique(data$Year)
```

## Interactive maps
```{r}
# Set highcharter options for tooltip decimals
options(highcharter.tooltip.valueDecimals = 2)

# Create highcharter map visualization
hc <- highchart() %>%
  hc_add_series_map(
    worldgeojson, data, value = "GDP", 
    joinBy = c('name', 'Country_name'),
    name = "GDP (current US$)"
  )  %>% 
  hc_colorAxis(stops = color_stops()) %>% 
  hc_title(text = "World Map") %>% 
  hc_subtitle(text = "GDP in current US$")

hc
```

```{r}
# Set highcharter options for tooltip decimals
options(highcharter.tooltip.valueDecimals = 2)

# Create map visualizations for each variable
hc_life_expectancy <- highchart() %>%
  hc_add_series_map(
    worldgeojson, data, 
    value = "Life_expectancy", 
    joinBy = c('name', 'Country_name'),
    name = "Life Expectancy"
  ) %>%
  hc_colorAxis(stops = color_stops()) %>%
  hc_title(text = "World Map") %>%
  hc_subtitle(text = "Life Expectancy")

hc_suicide_rates <- highchart() %>%
  hc_add_series_map(
    worldgeojson, data, 
    value = "Suicide_Rate", 
    joinBy = c('name', 'Country_name'),
    name = "Suicide Rates"
  ) %>%
  hc_colorAxis(stops = color_stops()) %>%
  hc_title(text = "World Map") %>%
  hc_subtitle(text = "Suicide Rate")

hc_corruption <- highchart() %>%
  hc_add_series_map(
    worldgeojson, data, 
    value = "Corruption", 
    joinBy = c('name', 'Country_name'),
    name = "Corruption"
  ) %>%
  hc_colorAxis(stops = color_stops()) %>%
  hc_title(text = "World Map") %>%
  hc_subtitle(text = "Corruption")

# Display the map visualizations
list(hc_life_expectancy, hc_suicide_rates, hc_corruption)

```

# AVG GDP over years
## Calculation 
```{r}
avg_gdp_per_year <- data %>% 
  group_by (`Year`) %>% 
  summarise(avg_gpd = mean(`GDP`))

avg_gdp_per_year
```
## Plot 
```{r}
ggplot(avg_gdp_per_year, aes(x = Year, y = avg_gpd)) +
  geom_line(color = "blue") +  
  labs(title = "Average GDP Over Time worldwide",
       x = "Year",
       y = "GDP") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_x_continuous(breaks = seq(min(avg_gdp_per_year$Year), max(avg_gdp_per_year$Year), by = 1))
```
# AVG Happiness over years
## Calculation 
```{r}
avg_happiness_per_year <- data %>% 
  group_by (`Year`) %>% 
  summarise(avg_happinnes = mean(`Life_ladder`, na.rm = T))

avg_happiness_per_year
```
## Plot 

```{r}
ggplot(avg_happiness_per_year, aes(x = Year, y = avg_happinnes)) +
  geom_line(color = "blue") +  
  labs(title = "Average Happiness Over Time worldwide",
       x = "Year",
       y = "Happiness") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_x_continuous(breaks = seq(min(avg_happiness_per_year$Year), max(avg_happiness_per_year$Year), by = 1))
```

# AVG Suicide Rates over years
## Calculation 
```{r}
avg_Suicide_Rate_per_year <- data %>% 
  group_by (`Year`) %>% 
  summarise(avg_Suicide_Rate = mean(`Suicide_Rate`, na.rm = T))

avg_Suicide_Rate_per_year
```
## Plot 

```{r}
ggplot(avg_Suicide_Rate_per_year, aes(x = Year, y = avg_Suicide_Rate)) +
  geom_line(color = "blue") +  
  labs(title = "Average Suicide Rate Over Time worldwide",
       x = "Year",
       y = "Suicide Rate") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_x_continuous(breaks = seq(min(avg_Suicide_Rate_per_year$Year), max(avg_Suicide_Rate_per_year$Year), by = 1))
```
# AVG Bankruptcies over years
## Calculation 
```{r}
avg_Bankruptcies_per_year <- data %>% 
  group_by (`Year`) %>% 
  summarise(avg_Bankruptcies = mean(`Value`, na.rm = T))

avg_Bankruptcies_per_year
```
## Plot 

```{r}
ggplot(avg_Bankruptcies_per_year, aes(x = Year, y = avg_Bankruptcies)) +
  geom_line(color = "blue") +  
  labs(title = "Average Bankruptcies Over Time worldwide",
       x = "Year",
       y = "Bankruptcies") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_x_continuous(breaks = seq(min(avg_Bankruptcies_per_year$Year), max(avg_Bankruptcies_per_year$Year), by = 1))
```

# Plot Average GDP and Average Suicide Rate over years
```{r}
# Finding the ratio for scaling the second axis
ratio <- max(avg_gdp_per_year$avg_gpd) / max(avg_Suicide_Rate_per_year$avg_Suicide_Rate)

# Creating the base plot
ggplot() +
  # Adding the bar plot for GDP
  geom_bar(data = avg_gdp_per_year, aes(x = Year, y = avg_gpd), stat = "identity", fill = "skyblue", width = 0.2) +
  # Adding the line plot for Average Happiness
  geom_line(data = avg_Suicide_Rate_per_year, aes(x = Year, y = avg_Suicide_Rate * ratio), color = "red", size = 1.5) +
  # Enhancing the plot
  labs(title = "Average GDP and Suicide Rate Over Time",
       x = "Year",
       y = "Average GDP") +
  scale_y_continuous(sec.axis = sec_axis(~ . / ratio, name = "Average Suicide Rate")) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_x_continuous(breaks = seq(min(c(avg_gdp_per_year$Year, avg_Suicide_Rate_per_year$Year)), 
                                  max(c(avg_gdp_per_year$Year, avg_Suicide_Rate_per_year$Year)), by = 1))

```

# Plot Average Happinness and Average Suicide Rate over years
```{r}
# Finding the ratio for scaling the second axis
ratio <- max(avg_happiness_per_year$avg_happinnes) / max(avg_Suicide_Rate_per_year$avg_Suicide_Rate)

# Creating the base plot
ggplot() +
  # Adding the bar plot for GDP
  geom_bar(data = avg_happiness_per_year, aes(x = Year, y = avg_happinnes), stat = "identity", fill = "skyblue", width = 0.2) +
  # Adding the line plot for Average Happiness
  geom_line(data = avg_Suicide_Rate_per_year, aes(x = Year, y = avg_Suicide_Rate * ratio), color = "red", size = 1.5) +
  # Enhancing the plot
  labs(title = "Average Happinness and Suicide Rate Over Time",
       x = "Year",
       y = "Average Happinness") +
  scale_y_continuous(sec.axis = sec_axis(~ . / ratio, name = "Average Suicide Rate")) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_x_continuous(breaks = seq(min(c(avg_happiness_per_year$Year, avg_Suicide_Rate_per_year$Year)), 
                                  max(c(avg_happiness_per_year$Year, avg_Suicide_Rate_per_year$Year)), by = 1))

```

# Plot Average Bankruptcies and Average Suicide Rate over years
```{r}
# Finding the ratio for scaling the second axis
ratio <- max(avg_Bankruptcies_per_year$avg_Bankruptcies) / max(avg_Suicide_Rate_per_year$avg_Suicide_Rate)

# Creating the base plot
ggplot() +
  # Adding the bar plot for GDP
  geom_bar(data = avg_Bankruptcies_per_year, aes(x = Year, y = avg_Bankruptcies), stat = "identity", fill = "skyblue", width = 0.2) +
  # Adding the line plot for Average Happiness
  geom_line(data = avg_Suicide_Rate_per_year, aes(x = Year, y = avg_Suicide_Rate * ratio), color = "red", size = 1.5) +
  # Enhancing the plot
  labs(title = "Average Bankruptcies and Suicide Rate Over Time",
       x = "Year",
       y = "Average Bankruptcies") +
  scale_y_continuous(sec.axis = sec_axis(~ . / ratio, name = "Average Suicide Rate")) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_x_continuous(breaks = seq(min(c(avg_Bankruptcies_per_year$Year, avg_Suicide_Rate_per_year$Year)), 
                                  max(c(avg_Bankruptcies_per_year$Year, avg_Suicide_Rate_per_year$Year)), by = 1))

```

# Comparisions

## Two Happiest and Two unhappiest countries vs average suicide rate
```{r}
avg_Suicide_Rate_per_country = data %>%
  group_by(Country_name) %>%
  summarise(avg_suicide_rate = mean(Suicide_Rate, na.rm = TRUE)) %>%
  arrange(avg_suicide_rate) %>% 
  mutate(Row_Number = row_number())

avg_Suicide_Rate_per_country

avg_happiness_per_country <- data %>%
  group_by(Country_name) %>%
  summarise(avg_happiness = mean(Life_ladder, na.rm = TRUE)) %>%
  arrange(desc(avg_happiness))

least_happy =  tail(avg_happiness_per_country, 2)
most_happy = head(avg_happiness_per_country, 2)


avg_Suicide_Rate_per_country %>% 
  filter(Country_name %in% least_happy$Country_name)

# Interpretation: Japan and South Africa are two very unhappy countries. And they also have a high suicide rate

avg_Suicide_Rate_per_country %>% 
  filter(Country_name %in% most_happy$Country_name)

# Interpretation: Finland is a the second most happy country. But is still on place 16/19 when it comes to suicides

```

## Two most wealthy countries and two most poor countries vs average suicide rate
```{r}
avg_gdp_per_country <- data %>% 
  group_by (`Country_name`) %>% 
  summarise(avg_gpd = mean(`GDP`)) %>% 
  arrange(desc(avg_gpd))

least_gdp =  tail(avg_gdp_per_country, 2)
most_gdp = head(avg_gdp_per_country, 2)

avg_Suicide_Rate_per_country %>% 
  filter(Country_name %in% least_gdp$Country_name)

# Interpretation: New Zealand and Iceland are two very small countries.  

avg_Suicide_Rate_per_country %>% 
  filter(Country_name %in% most_gdp$Country_name)

# Interpretation: The two biggest economies are based on the lower end of suicide rates
```


## Two most bankcuptcies and two least bankcuptcies countries vs average suicide rate
```{r}

avg_Bankruptcies_per_year <- data %>% 
  group_by (`Country_name`) %>% 
  summarise(avg_bankruptcies = mean(`Value`, na.rm = T)) %>% 
  arrange(desc(avg_bankruptcies))

least_bank =  tail(avg_Bankruptcies_per_year, 2)
most_bank = head(avg_Bankruptcies_per_year, 2)

avg_Suicide_Rate_per_country %>% 
  filter(Country_name %in% least_bank$Country_name)

# Interpretation: Bankruptcies don't have an influence on suicide rates

avg_Suicide_Rate_per_country %>% 
  filter(Country_name %in% most_bank$Country_name)

# Interpretation: Bankruptcies don't have an influence on suicide rates
```
# In depth analysis Germany
## Data preperation for Germany
```{r}
# Prepare data for only Germany
germany_data = data %>% 
  filter(Country_name == "Germany")
```

## Plot GDP Germany
```{r}
# Plot Germany GDP over Years
avg_gdp_year_germany = germany_data %>% 
  group_by(Year) %>% 
  summarise(avg_gdp = mean(GDP)) 


ggplot(avg_gdp_year_germany, aes(x = Year, y = avg_gdp)) +
  geom_line(color = "blue") +  
  labs(title = "Average GPD Over Time - Germany",
       x = "Year",
       y = "GDP") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_x_continuous(breaks = seq(min(avg_gdp_year_germany$Year), max(avg_gdp_year_germany$Year), by = 1))

```

## Plot Suicide Rate Germany
```{r}
avg_suicide_year_germany = germany_data %>% 
  group_by(Year) %>% 
  summarise(avg_suicide = mean(Suicide_Rate)) 


ggplot(avg_suicide_year_germany, aes(x = Year, y = avg_suicide)) +
  geom_line(color = "blue") +  
  labs(title = "Average Suicide Rate Over Time - Germany",
       x = "Year",
       y = "Suicide Rate") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_x_continuous(breaks = seq(min(avg_suicide_year_germany$Year), max(avg_suicide_year_germany$Year), by = 1))
```
## Plot Bankruptcies Rate Germany
```{r}
avg_bank_year_germany = germany_data %>% 
  group_by(Year) %>% 
  summarise(avg_bank = mean(Value)) 


ggplot(avg_bank_year_germany, aes(x = Year, y = avg_bank)) +
  geom_line(color = "blue") +  
  labs(title = "Average bankruptcies Over Time - Germany",
       x = "Year",
       y = "Bankruptcies") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_x_continuous(breaks = seq(min(avg_bank_year_germany$Year), max(avg_bank_year_germany$Year), by = 1))
```
## Plot Happiness Rate Germany
```{r}
avg_happiness_year_germany = germany_data %>% 
  group_by(Year) %>% 
  summarise(avg_happy = mean(Life_ladder)) 


ggplot(avg_happiness_year_germany, aes(x = Year, y = avg_happy)) +
  geom_line(color = "blue") +  
  labs(title = "Average Happiness Over Time - Germany",
       x = "Year",
       y = "Happiness") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_x_continuous(breaks = seq(min(avg_happiness_year_germany$Year), max(avg_happiness_year_germany$Year), by = 1))
```
# In depth analysis South Africa
## Data preperation for South Africa
```{r}
# Prepare data for only Germany
SA_data = data %>% 
  filter(Country_name == "South Africa")
```

## Plot GDP SA
```{r}
# Plot Germany GDP over Years
avg_gdp_year_SA = SA_data %>% 
  group_by(Year) %>% 
  summarise(avg_gdp = mean(GDP)) 



ggplot(avg_gdp_year_SA, aes(x = Year, y = avg_gdp)) +
  geom_line(color = "blue") +  
  labs(title = "Average GPD Over Time - SA",
       x = "Year",
       y = "GDP") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_x_continuous(breaks = seq(min(avg_gdp_year_SA$Year), max(avg_gdp_year_SA$Year), by = 1))

```
## Plot Suicide Rate SA
```{r}
avg_suicide_year_SA = SA_data %>% 
  group_by(Year) %>% 
  summarise(avg_suicide = mean(Suicide_Rate)) 


ggplot(avg_suicide_year_SA, aes(x = Year, y = avg_suicide)) +
  geom_line(color = "blue") +  
  labs(title = "Average Suicide Rate Over Time - SA",
       x = "Year",
       y = "Suicide Rate") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_x_continuous(breaks = seq(min(avg_suicide_year_SA$Year), max(avg_suicide_year_SA$Year), by = 1))
```

## Plot Happiness Rate SA
```{r}
avg_happiness_year_SA = SA_data %>% 
  group_by(Year) %>% 
  summarise(avg_happy = mean(Life_ladder)) 


ggplot(avg_happiness_year_SA, aes(x = Year, y = avg_happy)) +
  geom_line(color = "blue") +  
  labs(title = "Average Happiness Over Time - SA",
       x = "Year",
       y = "Happiness") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_x_continuous(breaks = seq(min(avg_happiness_year_SA$Year), max(avg_happiness_year_SA$Year), by = 1))
```

## Plot Bankruptcies Rate SA
```{r}
avg_bank_year_SA = SA_data %>% 
  group_by(Year) %>% 
  summarise(avg_bank = mean(Value)) 


ggplot(avg_bank_year_SA, aes(x = Year, y = avg_bank)) +
  geom_line(color = "blue") +  
  labs(title = "Average bankruptcies Over Time - SA",
       x = "Year",
       y = "Bankruptcies") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_x_continuous(breaks = seq(min(avg_bank_year_SA$Year), max(avg_bank_year_SA$Year), by = 1))
```